Libaries

# BiocManager::install("SPIAT")
library(SPIAT)
library(dplyr)
#library(ggspavis)
library(ggplot2)
library(SpatialExperiment)
# colour_pal <- c(
#   "#6600CC",
#   "#CC0066",
#   "#CC0066",
#   "#FF0000",
#   "#FFFF00",
#   "#C65911",
#   "#66FF33",
#   "#66FF99",
#   "#0070C0",
#   "#92D050",
#   "#66FFFF",
#   "#2F02BE",
#   "purple",
#   "brown",
#   "green",
#   "grey"
# )

Identifying Phenotype compartment

immune_names <- c("B", "CD3 T", "CD4 T", "CD8 T", "DC", "DC/Mono", 
                  "T reg", "NK", "Mono/Neu", "Neu", "Mac", "Other immune")
nonimmune_names <- c("Endothelial", "Epithelial", "Mesenchymal")
tumor_names <- c("Other")

Load RDS file of SPE

Load the spatial experiment object created earlier.

load(here::here("Output", "Data", "03_SPE", "03_TNBC_2018_spe.rds"))

Check the SpatialExperiment object.

#spe
colnames(spe) <- paste0("Cell_", 1:dim(assay(spe))[[2]])
spe
## class: SpatialExperiment 
## dim: 36 179194 
## metadata(0):
## assays(1): exprs
## rownames(36): betaCatenin CD11b ... SMA Vimentin
## rowData names(0):
## colnames(179194): Cell_1 Cell_2 ... Cell_179193 Cell_179194
## colData names(26): sample_id patient_id ... Si Ta
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
## spatialCoords names(2) : centroidX centroidY
## imgData names(1): sample_id

Define color schemes for each phenotype

We want to define color for each phenotype, and keeping the color consistent in all samples.

color_vectors <- list()

phenotype_col <- setNames(object = c(
  "#6600CC",
  "#CC0066",
  "#CC0066",
  "#FF0000",
  "#FFFF00",
  "#C65911",
  "#66FF33",
  "#66FF99",
  "#0070C0",
  "#92D050",
  "#66FFFF",
  "#2F02BE",
  "purple",
  "brown",
  "green",
  "grey"), unique(spe$mm))
phenotype_col
##            B        CD3 T        CD4 T        CD8 T           DC      DC/Mono 
##    "#6600CC"    "#CC0066"    "#CC0066"    "#FF0000"    "#FFFF00"    "#C65911" 
##  Endothelial   Epithelial          Mac  Mesenchymal     Mono/Neu          Neu 
##    "#66FF33"    "#66FF99"    "#0070C0"    "#92D050"    "#66FFFF"    "#2F02BE" 
##           NK        Other Other immune        T reg 
##     "purple"      "brown"      "green"       "grey"
color_vectors$phenotype <- phenotype_col
# phenotype_col <- data.frame(Type = unique(spe$mm),
#                             Colour = c(
#   "#6600CC",
#   "#CC0066",
#   "#CC0066",
#   "#FF0000",
#   "#FFFF00",
#   "#C65911",
#   "#66FF33",
#   "#66FF99",
#   "#0070C0",
#   "#92D050",
#   "#66FFFF",
#   "#2F02BE",
#   "purple",
#   "brown",
#   "green",
#   "grey"))

Subsetting

Subset spe with sampel 1.

spe_1 <- spe[, spe$sample_id == "Sample_04"]
#celltype <- tibble(spe_1$mm)
#coordX <- tibble(spatialCoords(spe_1)[, "centroidX"])
#coordY <- tibble(spatialCoords(spe_1)[, "centroidY"])
celltype <- spe_1$mm
coordX <- spatialCoords(spe_1)[, "centroidX"]
coordY <- spatialCoords(spe_1)[, "centroidY"]

Transform spe into SPIAT format

We want to transform the colData into the SPIAT format to further analysis with SPIAT packages.

spiat_sample1 <- SPIAT::format_image_to_spe(format = "general",
                                    intensity_matrix = assay(spe_1),
                                    phenotypes = celltype,
                                    coord_x = coordX,
                                    coord_y = coordY)
spiat_sample1
## class: SpatialExperiment 
## dim: 36 5381 
## metadata(0):
## assays(1): ''
## rownames(36): betaCatenin CD11b ... SMA Vimentin
## rowData names(0):
## colnames(5381): Cell_1405 Cell_1406 ... Cell_178421 Cell_178422
## colData names(3): Cell.ID Phenotype sample_id
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
## spatialCoords names(2) : Cell.X.Position Cell.Y.Position
## imgData names(0):

Penotype compartment in the tissue

unique_cells <- unique(spiat_sample1$Phenotype)
immune_cells <- base::subset(unique_cells, unique_cells %in% immune_names)
nonimmune_cells <- base::subset(unique_cells, unique_cells %in% nonimmune_names)
tumor_cells <- base:: subset(unique_cells, unique_cells %in% tumor_names)

Colour panel for each tissue

We need to customize colour panel for each tissue as they contain different number of unique phenotype (for SPIAT plotting purpose).

colour_pal <- phenotype_col[unique_cells]
colour_pal
##            B        CD3 T        CD4 T        CD8 T           DC      DC/Mono 
##    "#6600CC"    "#CC0066"    "#CC0066"    "#FF0000"    "#FFFF00"    "#C65911" 
##  Endothelial   Epithelial          Mac  Mesenchymal     Mono/Neu          Neu 
##    "#66FF33"    "#66FF99"    "#0070C0"    "#92D050"    "#66FFFF"    "#2F02BE" 
##           NK        Other Other immune        T reg 
##     "purple"      "brown"      "green"       "grey"

Visulaizing tissues

We want to visualize the phenotype in each tissues using catergorical dot plot.

p1 <- plot_cell_categories(spe_object = spiat_sample1,
                     categories_of_interest = unique(spiat_sample1$Phenotype),
                     colour_vector = colour_pal,
                     feature_colname = "Phenotype") +
  #scale_color_manual(values = metadata(spiat_sample1)$color_vectors$phenotype) +
  ggtitle("")
p1

Basic Analysis

We want to performing some basic analyses on the image.

Cell precentage

We obtain the number and proportion of each cell type and visualize them.

p_cells <- calculate_cell_proportions(spe_object = spiat_sample1,
                           reference_celltypes = NULL,
                           celltypes_to_exclude = NULL,
                           feature_colname = "Phenotype",
                           plot.image = TRUE)

p_cells
##       Cell_type Number_of_celltype   Proportion  Percentage Proportion_name
## 14        Other               1586 0.2947407545 29.47407545          /Total
## 10  Mesenchymal                676 0.1256272068 12.56272068          /Total
## 8    Epithelial                599 0.1113175990 11.13175990          /Total
## 3         CD4 T                545 0.1012822895 10.12822895          /Total
## 1             B                515 0.0957071176  9.57071176          /Total
## 15 Other immune                477 0.0886452332  8.86452332          /Total
## 4         CD8 T                279 0.0518490987  5.18490987          /Total
## 5            DC                223 0.0414421111  4.14421111          /Total
## 9           Mac                196 0.0364244564  3.64244564          /Total
## 16        T reg                 86 0.0159821594  1.59821594          /Total
## 6       DC/Mono                 78 0.0144954469  1.44954469          /Total
## 11     Mono/Neu                 61 0.0113361829  1.13361829          /Total
## 12          Neu                 40 0.0074335625  0.74335625          /Total
## 2         CD3 T                 12 0.0022300688  0.22300688          /Total
## 13           NK                  4 0.0007433563  0.07433563          /Total
## 7   Endothelial                  4 0.0007433563  0.07433563          /Total

We also want to plot the cell proportion as bar plots.

plot_cell_percentages(cell_proportions = p_cells,
                      cells_to_exclude = NULL,
                      cellprop_colname = "Proportion_name")

We can separte them as different compartment such as immune, non immune, and other…

However, these plots seem to be a little bit chaos. We would like them be visualized in their comartment.

Immune Compartment

Cell percentage

p_immune <- calculate_cell_proportions(spe_object = spiat_sample1,
                           reference_celltypes = NULL,
                           celltypes_to_exclude = c(nonimmune_cells, tumor_cells),
                           feature_colname = "Phenotype",
                           plot.image = TRUE)

p_immune
##    Cell_type Number_of_celltype  Proportion Percentage Proportion_name
## 3      CD4 T                545 0.267287886 26.7287886          /Total
## 1          B                515 0.252574792 25.2574792          /Total
## 4      CD8 T                279 0.136831780 13.6831780          /Total
## 5         DC                223 0.109367337 10.9367337          /Total
## 9        Mac                196 0.096125552  9.6125552          /Total
## 16     T reg                 86 0.042177538  4.2177538          /Total
## 6    DC/Mono                 78 0.038254046  3.8254046          /Total
## 11  Mono/Neu                 61 0.029916626  2.9916626          /Total
## 12       Neu                 40 0.019617460  1.9617460          /Total
## 2      CD3 T                 12 0.005885238  0.5885238          /Total
## 13        NK                  4 0.001961746  0.1961746          /Total

Cell proportion

plot_cell_percentages(cell_proportions = p_immune,
                      cells_to_exclude = NULL,
                      cellprop_colname = "Proportion_name")

Non-Immune Compartment

Cell percentage

p_nonimmune <- calculate_cell_proportions(spe_object = spiat_sample1,
                           reference_celltypes = NULL,
                           celltypes_to_exclude = c(immune_cells, tumor_cells),
                           feature_colname = "Phenotype",
                           plot.image = TRUE)

p_nonimmune
##      Cell_type Number_of_celltype  Proportion Percentage Proportion_name
## 10 Mesenchymal                676 0.528537920 52.8537920          /Total
## 8   Epithelial                599 0.468334636 46.8334636          /Total
## 7  Endothelial                  4 0.003127443  0.3127443          /Total

Cell proportion barplot

plot_cell_percentages(cell_proportions = p_nonimmune,
                      cells_to_exclude = NULL,
                      cellprop_colname = "Proportion_name")

Cell distance

We want to calculate the pairwise distances between two cell types(cell type A and cell type B) using violet plot.

distance <- calculate_pairwise_distances_between_celltypes(
  spe_object = spiat_sample1,
  cell_types_of_interest = c(nonimmune_cells, tumor_cells),
  feature_colname = "Phenotype")
plot_cell_distances_violin(distance)

We also calculate the summary statistics for the distance between each combinations of cell types.

summary_distance <- calculate_summary_distances_between_celltypes(distance)
summary_distance
##                       Pair      Mean       Min      Max    Median  Std.Dev
## 1  Endothelial/Endothelial  633.7053 300.95445 1143.275  599.9288 285.0360
## 2   Endothelial/Epithelial 1087.2253 249.25493 2314.492 1010.5140 352.8898
## 3  Endothelial/Mesenchymal  825.6341  14.65962 2342.063  780.7198 452.6015
## 4        Endothelial/Other 1023.3882  14.85366 2406.227  966.0057 427.0792
## 5   Epithelial/Endothelial 1087.2253 249.25493 2314.492 1010.5140 352.8898
## 6    Epithelial/Epithelial  762.7003  12.00658 2454.521  632.9717 523.0041
## 7   Epithelial/Mesenchymal 1147.0954  15.63464 2729.909 1124.8106 423.4883
## 8         Epithelial/Other 1166.3782  12.63314 2624.434 1222.7692 563.8142
## 9  Mesenchymal/Endothelial  825.6341  14.65962 2342.063  780.7198 452.6015
## 10  Mesenchymal/Epithelial 1147.0954  15.63464 2729.909 1124.8106 423.4883
## 11 Mesenchymal/Mesenchymal  838.7467  10.63704 2759.031  784.1352 456.7852
## 12       Mesenchymal/Other 1012.3537  11.59441 2816.976  982.7881 446.3273
## 13       Other/Endothelial 1023.3882  14.85366 2406.227  966.0057 427.0792
## 14        Other/Epithelial 1166.3782  12.63314 2624.434 1222.7692 563.8142
## 15       Other/Mesenchymal 1012.3537  11.59441 2816.976  982.7881 446.3273
## 16             Other/Other 1105.8901  12.14511 2659.581 1139.9157 544.8262
##      Reference      Target
## 1  Endothelial Endothelial
## 2  Endothelial  Epithelial
## 3  Endothelial Mesenchymal
## 4  Endothelial       Other
## 5   Epithelial Endothelial
## 6   Epithelial  Epithelial
## 7   Epithelial Mesenchymal
## 8   Epithelial       Other
## 9  Mesenchymal Endothelial
## 10 Mesenchymal  Epithelial
## 11 Mesenchymal Mesenchymal
## 12 Mesenchymal       Other
## 13       Other Endothelial
## 14       Other  Epithelial
## 15       Other Mesenchymal
## 16       Other       Other

Interpretaion of the above summry would be “the average pairwise distance between cells of B and cells of CD4 T are 642.958”.

plot_distance_heatmap(phenotype_distances_result = summary_distance,
                      metric = "mean")

Minimum cell distance

Different than the pairwise distance where it calculates all cell types of interest, here only identify distance to the cloest cell of type B to each of the cell of type A.

min_dist <- calculate_minimum_distances_between_celltypes(
  spe_object = spiat_sample1,
  cell_types_of_interest = c(nonimmune_cells, tumor_cells),
  feature_colname = "Phenotype")
## [1] "Markers had been selected in minimum distance calculation: "
## [1] "Endothelial" "Epithelial"  "Mesenchymal" "Other"

Visualize them with violin plot

plot_cell_distances_violin(min_dist)

sum_min_dist <- calculate_summary_distances_between_celltypes(min_dist)
sum_min_dist
##                       Pair      Mean       Min        Max    Median    Std.Dev
## 1   Endothelial/Epithelial 352.14926 249.25493  469.94212 344.69999  90.566481
## 2  Endothelial/Mesenchymal  16.13187  14.65962   16.92138  16.47324   1.055473
## 3        Endothelial/Other  36.90058  14.85366   51.75084  40.49891  15.721087
## 4   Epithelial/Endothelial 765.94661 249.25493 1311.36688 766.82740 153.315108
## 5   Epithelial/Mesenchymal 146.31005  15.63464  386.67559 140.02586  76.674329
## 6         Epithelial/Other  34.10005  12.63314  100.40677  29.97670  14.952184
## 7  Mesenchymal/Endothelial 494.45818  14.65962 1340.74121 393.29165 326.746983
## 8   Mesenchymal/Epithelial 316.83685  15.63464  749.59241 297.03752 185.970618
## 9        Mesenchymal/Other  45.15832  11.59441  282.57175  39.24468  26.367324
## 10       Other/Endothelial 698.72427  14.85366 1354.43391 704.14687 270.074215
## 11        Other/Epithelial 133.31777  12.63314  699.26277  64.75419 153.339697
## 12       Other/Mesenchymal  79.74269  11.59441  399.59002  62.66061  58.480358
##      Reference      Target
## 1  Endothelial  Epithelial
## 2  Endothelial Mesenchymal
## 3  Endothelial       Other
## 4   Epithelial Endothelial
## 5   Epithelial Mesenchymal
## 6   Epithelial       Other
## 7  Mesenchymal Endothelial
## 8  Mesenchymal  Epithelial
## 9  Mesenchymal       Other
## 10       Other Endothelial
## 11       Other  Epithelial
## 12       Other Mesenchymal

Interpretaion of the above summry would be “the average minimum distance between cells of B and cells of CD4 T are 89.539”.

Plot heatmap.

plot_distance_heatmap(phenotype_distances_result = sum_min_dist,
                      metric = "mean")

Cell colocalisation

We also want to qualify cell colocalisation, where it refers to how much two cell types are colocalisating and thus potentially interacting. Cell colocalisation metrics allow capturing a dominant spatial pattern in an image.

Cells in Neibourhood

Here, we calculate the avereage percentage of cells of one target cell type with a radius of a reference cell type.

average_percentage_of_cells_within_radius(spe_object = spiat_sample1,
                                          target_celltype = "CD4 T",
                                          reference_celltype = "B",
                                          feature_colname = "Phenotype")
## [1] 16.96122

We can also calculate the average intensity of the target_marker within a radius from the cell positive for the reference marker. (Note that it pools all cells with the target marker that are within the specific radius of any reference cell.)

average_marker_intensity_within_radius(spe_object = spiat_sample1,
                                       target_marker = "CD4",
                                       reference_marker = "CD8",
                                       radius = 30)
## [1] 0.5157241

We can also plot a line graph showing the intensity levels at each specified radius.

plot_average_intensity(spe_object = spiat_sample1,
                       target_marker = "CD4",
                       reference_marker = "CD8",
                       radii = c(15, 25, 35, 45, 55, 70, 85, 100))

Mixing Score and Normalised Mixing Score

The score was originally defined as the number of immune-tumor interactions divided by the number of immune-immune interactions from Keren et al. in 2018. - This mixing score is defined as the number of target-reference interactions/number of reference-reference interactions within a specified radius. The higher the score the greater the mixing of the two cell types. - The normalised score is normalised for the number of target and reference cells in the image.

mixing_score_summary(spe_object = spiat_sample1,
                     target_celltype = "B",
                     reference_celltype = "CD4 T",
                     feature_colname = "Phenotype",
                     radius = 20)
##   Reference Target Number_of_reference_cells Number_of_target_cells
## 2     CD4 T      B                       545                    515
##   Reference_target_interaction Reference_reference_interaction Mixing_score
## 2                          108                             274    0.3941606
##   Normalised_mixing_score
## 2               0.8327121

Cross K functions

Cross K function calculates the number of target cell types across a range of radii from a reference cell type, and compares the behaviour of the input image with an image of randomly distributed points using a Poisson point process. There are four patterns,

  • The red line and black line are close to each other, meaning the two types of points are randomly independently distributed.
  • The red line is under the black line, with a large difference in the middle of the plot, meaning the points are mixed and split into clusters.
  • With the increase of radius, the black line diverges further from the red line, meaning that there is one mixed cluster of two types of points.
  • The red line is above the black line, meaning that the two types of points form separated clusters.
df_cross <- calculate_cross_functions(spe_object = spiat_sample1,
                                      method = "Kcross",
                                      cell_types_of_interest = c("B", "CD4 T"),
                                      feature_colname = "Phenotype",
                                      dist = 100)

The above pattern is similar to the first patteren listed, which means that cell type B and cell type CD4 T are randonmly independently distributed.

We also want to calculate the area under the curve(AUC) of the cross-K function, to identidy the two cell types of cells whether:

  • negative values: separate clusters
  • positive values: mixing of cell types
AUC_of_cross_function(df.cross = df_cross)
## [1] 0.1703536

Thus, the AUC score is close to zero that the two types of cells either do not have a relationship or they form a ring surrounding a cluster.

Cross-K interaction

Details of such methodologies please see SPIAT vignettes.

crossing_of_crossK(df.cross = df_cross)
## [1] "Crossing of cross K function is detected for this image, indicating a potential immune ring."
## [1] "The crossing happens at the 18% of the specified distance."
## [1] 0.82

The result shows that the crossing happens at 80% of the specified distance (100) of the cross K function, which is very close to the edge of the tumour cluster. This means that the crossing is not due to the randomness in cell distribution, nor due to two close located immune and tumour clusters. This result aligns with the observation that there is an immune ring surrounding the tumour cluster.

Spatial heterogeneity

We want to measure the spatial heterogeneity of patterns as dominant spatial patterns are unlikely to be distributed evenly in a tissue.

Localised Entropy

Entropy in spatial analysis refers to the balance in the number of cells of distinct populations. An entropy score can be obtained for an entire image, but does not provide any information of the image.

calculate_entropy(spe_object = spiat_sample1,
                  cell_types_of_interest = unique(spiat_sample1$Phenotype),
                  feature_colname = "Phenotype")
## [1] 3.147107

Fishnet Grid

One approach to calculate localised metric is to split the image into fishnet grid squares. For each grid square, grid_metrics() calculates the metric for that square and visualise the raster image. Users can choose any metric as the localised metric.

grid <- grid_metrics(spe_object = spiat_sample1,
                     FUN = calculate_entropy,
                     cell_types_of_interest = unique(spiat_sample1$Phenotype),
                     feature_colname = "Phenotype",
                     n_split = 20)

Characterize Tissue Structure

We want to focus on understanding the spatial distribution of an certain cell type of cell populations realtive to tissue regions. We will perform a analysis to characterize the immune population in tumor structure that can generalised to other tissue and cell types.

Determine if not a clear tumor margin

The following function quantify the quality of the ‘margin’ by calculate the ratio of tumor bordering cells to tumour cells(R-BT). This ratio is high when there is a disproportional high number of tumour margin cells compared to internal tumour cells.

R_BC(spe_object = spiat_sample1,
     cell_type_of_interest = "Other",
     feature_colname = "Phenotype")

## [1] 0.2629256

The result is 0.10899. This low value means there are relatively low number of bordering cells compared to total tumour cells, meaning that this image has clear tumour margins.

Automatic identification of the tumor margin

We now want to identify the bordering cells.

formatted_border <- identify_bordering_cells(spe_object = spiat_sample1,
                                             reference_cell = "Other",
                                             feature_colname = "Phenotype")
## [1] "The alpha of Polygon is: 68.0375"

We now want to get the number of cell type clusters

attr(formatted_border, "n_of_clusters")
## [1] 4

There are 2 B cell clusters in the image.

Classfication of cells based on locations

We want to define our locations relative to the margin based on distances:

  • Internal Margin: bordering the margin, inside the tumour area
  • External Margin: bordering the margin, surrounding the tumour area
  • Inside: Inside the tumor area
  • Outside: Outside the tumor area

We now calculate the distance of cells to the tumor margin.

formatted_distance <- calculate_distance_to_margin(formatted_border)
## [1] "Markers had been selected in minimum distance calculation: "
## [1] "Non-border" "Border"

Next, we classify cell based on their location. As a distance cutoff, we use a distance of 5 cells from the tumour margin. The function first calculates the average minimum distance between all pairs of nearest cells and then multiples this number by 5.

immune_cells <- immune_names[unique(spiat_sample1$Phenotype) %in% immune_names]
immune_cells
##  [1] "B"            "CD3 T"        "CD4 T"        "CD8 T"        "DC"          
##  [6] "DC/Mono"      "Mono/Neu"     "Mac"          "Other immune" NA            
## [11] NA             NA
formatted_structure <- define_structure(spe_object = formatted_distance,
                                        cell_types_of_interest = immune_cells,
                                        feature_colname = "Phenotype",
                                        n_margin_layers = 5)

categories <- unique(formatted_structure$Structure)

We now plot and color these structure categories.

plot_cell_categories(spe_object = formatted_structure,
                     feature_colname = "Structure")

We also calculate the proportions of immune cells in each of the locations.

immune_proportions <- calculate_proportions_of_cells_in_structure(spe_object = formatted_structure,
                                                                  cell_types_of_interest = immune_names,
                                                                  feature_colname = "Phenotype")
immune_proportions
##                Cell.Type                            Relative_to
## 1                      B             All_cells_in_the_structure
## 2                  CD3 T             All_cells_in_the_structure
## 3                  CD4 T             All_cells_in_the_structure
## 4                  CD8 T             All_cells_in_the_structure
## 5                     DC             All_cells_in_the_structure
## 6                DC/Mono             All_cells_in_the_structure
## 7                  T reg             All_cells_in_the_structure
## 8                     NK             All_cells_in_the_structure
## 9               Mono/Neu             All_cells_in_the_structure
## 10                   Neu             All_cells_in_the_structure
## 11                   Mac             All_cells_in_the_structure
## 12          Other immune             All_cells_in_the_structure
## 13                     B All_cells_of_interest_in_the_structure
## 14                 CD3 T All_cells_of_interest_in_the_structure
## 15                 CD4 T All_cells_of_interest_in_the_structure
## 16                 CD8 T All_cells_of_interest_in_the_structure
## 17                    DC All_cells_of_interest_in_the_structure
## 18               DC/Mono All_cells_of_interest_in_the_structure
## 19                 T reg All_cells_of_interest_in_the_structure
## 20                    NK All_cells_of_interest_in_the_structure
## 21              Mono/Neu All_cells_of_interest_in_the_structure
## 22                   Neu All_cells_of_interest_in_the_structure
## 23                   Mac All_cells_of_interest_in_the_structure
## 24          Other immune All_cells_of_interest_in_the_structure
## 25                     B  The_same_cell_type_in_the_whole_image
## 26                 CD3 T  The_same_cell_type_in_the_whole_image
## 27                 CD4 T  The_same_cell_type_in_the_whole_image
## 28                 CD8 T  The_same_cell_type_in_the_whole_image
## 29                    DC  The_same_cell_type_in_the_whole_image
## 30               DC/Mono  The_same_cell_type_in_the_whole_image
## 31                 T reg  The_same_cell_type_in_the_whole_image
## 32                    NK  The_same_cell_type_in_the_whole_image
## 33              Mono/Neu  The_same_cell_type_in_the_whole_image
## 34                   Neu  The_same_cell_type_in_the_whole_image
## 35                   Mac  The_same_cell_type_in_the_whole_image
## 36          Other immune  The_same_cell_type_in_the_whole_image
## 37 All_cells_of_interest             All_cells_in_the_structure
##    P.Infiltrated.CoI P.Internal.Margin.CoI P.External.Margin.CoI P.Stromal.CoI
## 1        0.001709402           0.033477991           0.108585859   0.224824356
## 2        0.000000000           0.000000000           0.003156566   0.005464481
## 3        0.003418803           0.021078735           0.131944444   0.234192037
## 4        0.011965812           0.028518289           0.080176768   0.077283372
## 5        0.001709402           0.010539368           0.069444444   0.074160812
## 6        0.000000000           0.003099814           0.020202020   0.032006245
## 7        0.000000000           0.000000000           0.000000000   0.000000000
## 8        0.000000000           0.000000000           0.000000000   0.000000000
## 9        0.010256410           0.007439554           0.014520202   0.015612802
## 10       0.000000000           0.000000000           0.000000000   0.000000000
## 11       0.000000000           0.016119033           0.051136364   0.069476971
## 12       0.010256410           0.056416615           0.148358586   0.113192818
## 13       0.043478261           0.189473684           0.173038229   0.265682657
## 14       0.000000000           0.000000000           0.005030181   0.006457565
## 15       0.086956522           0.119298246           0.210261569   0.276752768
## 16       0.304347826           0.161403509           0.127766600   0.091328413
## 17       0.043478261           0.059649123           0.110663984   0.087638376
## 18       0.000000000           0.017543860           0.032193159   0.037822878
## 19       0.000000000           0.000000000           0.000000000   0.000000000
## 20       0.000000000           0.000000000           0.000000000   0.000000000
## 21       0.260869565           0.042105263           0.023138833   0.018450185
## 22       0.000000000           0.000000000           0.000000000   0.000000000
## 23       0.000000000           0.091228070           0.081488934   0.082103321
## 24       0.260869565           0.319298246           0.236418511   0.133763838
## 25       0.001941748           0.104854369           0.333980583   0.559223301
## 26       0.000000000           0.000000000           0.416666667   0.583333333
## 27       0.003669725           0.062385321           0.383486239   0.550458716
## 28       0.025089606           0.164874552           0.455197133   0.354838710
## 29       0.004484305           0.076233184           0.493273543   0.426008969
## 30       0.000000000           0.064102564           0.410256410   0.525641026
## 31       0.000000000           0.000000000           0.000000000   0.000000000
## 32       0.000000000           0.000000000           0.000000000   0.000000000
## 33       0.098360656           0.196721311           0.377049180   0.327868852
## 34       0.000000000           0.000000000           0.000000000   0.000000000
## 35       0.000000000           0.132653061           0.413265306   0.454081633
## 36       0.012578616           0.190775681           0.492662474   0.303983229
## 37       0.039316239           0.176689399           0.627525253   0.846213895

Lastly, we calculate summaries of distance of immune cells in the tumor structure.

immune_distance <- calculate_summary_distances_of_cells_to_borders(spe_object = formatted_structure,
                                                                   cell_types_of_interest = immune_names,
                                                                   feature_colname = "Phenotype")
immune_distance
##                     Cell.Type               Area    Min_d     Max_d    Mean_d
## 1  All_cell_types_of_interest Within_border_area 13.62429 230.11512  51.26510
## 2  All_cell_types_of_interest             Stroma 14.08942 552.34256 146.39155
## 3                           B Within_border_area 17.72247 122.72447  46.13370
## 4                           B             Stroma 14.89693 548.31367 172.66772
## 5                       CD3 T Within_border_area       NA        NA        NA
## 6                       CD3 T             Stroma 25.93917 437.86653 185.69582
## 7                       CD4 T Within_border_area 13.62429 177.98893  44.77514
## 8                       CD4 T             Stroma 14.08942 552.34256 167.43030
## 9                       CD8 T Within_border_area 17.59322 230.11512  54.96983
## 10                      CD8 T             Stroma 14.93929 456.97453 131.65782
## 11                         DC Within_border_area 18.67978 115.19337  49.11441
## 12                         DC             Stroma 14.99489 516.26682 117.35931
## 13                    DC/Mono Within_border_area 20.59185  98.43055  57.56269
## 14                    DC/Mono             Stroma 16.76060 461.88757 163.13484
## 15                      T reg Within_border_area 28.43179 139.50750  62.34038
## 16                      T reg             Stroma 17.67966 405.82424 111.99799
## 17                         NK Within_border_area       NA        NA        NA
## 18                         NK             Stroma 35.98391 338.43103 152.66999
## 19                   Mono/Neu Within_border_area 20.26714 221.18443  76.39486
## 20                   Mono/Neu             Stroma 16.14103 485.11780 113.13285
## 21                        Neu Within_border_area 28.40728 164.83390  92.97730
## 22                        Neu             Stroma 25.04924 459.37861 157.23073
## 23                        Mac Within_border_area 14.63485  84.40512  39.17638
## 24                        Mac             Stroma 16.62943 416.54969 138.59129
## 25               Other immune Within_border_area 15.55036 167.84719  46.25015
## 26               Other immune             Stroma 16.46051 525.12593 119.48438
##     Median_d  St.dev_d
## 1   40.84218  36.30870
## 2  106.60417 114.19024
## 3   43.57616  22.60391
## 4  135.61806 123.65639
## 5         NA        NA
## 6  218.38207 134.10566
## 7   37.24807  36.18568
## 8  131.35823 123.75076
## 9   38.41117  45.77380
## 10  86.92854 105.41112
## 11  47.01128  25.73717
## 12  95.74461  88.89667
## 13  42.86703  37.69157
## 14 116.86031 125.99122
## 15  53.58508  31.90796
## 16  81.36862  93.51937
## 17        NA        NA
## 18 118.13252 134.51916
## 19  51.36990  57.46312
## 20  81.80619  91.49616
## 21  88.35051  47.12003
## 22 158.76148 130.63778
## 23  35.94441  18.55930
## 24 106.11399 101.59468
## 25  36.64403  30.63708
## 26  81.43001  99.34922

Celluar neighbourhood

A celluar neighbourhood is defined as a group of cells that clusters together, where it can be cells of a single class and it can be a cells of mixture of cell types.

We want to identifies the celluar neighbourhood, where there are three methods of Hierarchical Clustering, dbsacan, and phenograph.

To move forward with hierarchical clustering algorithm, we would first compute the average minimum ditance between cells in the tissue.

average_minimum_distance(spe_object = spiat_sample1)
## [1] 20.61341

We then identify the neighbourhood with a radius of 50, and with a minimum neighbourhood size of 100.

clusters <- identify_neighborhoods(spe_object = spiat_sample1,
                                   method = "hierarchical",
                                   cell_types_of_interest = immune_names,
                                   radius = 50,
                                   min_neighborhood_size = 20,
                                   feature_colname = "Phenotype",
                                   no_pheno = NULL)

We can also visualize the cell composition of the neighbourhood by obtaining the percentages of cells with a specific phenotype within each neighbourhood and the number of cells in the neighbourhood.

neighborhoods_vis <- composition_of_neighborhoods(spe_object = clusters, feature_colname = "Phenotype")
neighborhoods_vis <- neighborhoods_vis[neighborhoods_vis$Total_number_of_cells >= 5, ]

We then plot the composition of the neighbourhood and show as a heatmap.

plot_composition_heatmap(composition = neighborhoods_vis,
                         feature_colname = "Phenotype")

Average Nearest Neighbourhood Index

We can use ANNI to test the presence of neighbourhood, output whether there is a clear neighbourhood (clustered) or unclear neighbourhood (random/dispersed).

average_nearest_neighbor_index(spe_object = clusters,
                               reference_celltypes = c("Cluster_4"),
                               feature_colname = "Neighborhood", p_val = 0.05)
## $ANN_index
## [1] 0.7688743
## 
## $pattern
## [1] "Clustered"
## 
## $`p-value`
## [1] 2.739314e-12
average_nearest_neighbor_index(spe_object = clusters,
                               reference_celltypes = immune_names,
                               feature_colname = "Phenotype", p_val = 0.05)
## $ANN_index
## [1] 1.198183
## 
## $pattern
## [1] "Dispersed"
## 
## $`p-value`
## [1] 6.11338e-81